######################### Load Libraries #######################################

library(foreign)

library(plyr)

library(gdata)

############################ Data Setup ########################################

# set working directory
setwd("~/_data/")

# Load main LPR data set
load("lprdata.RData")

# remove unnecessary objects from the R image
keep(data, sure=TRUE)

# rename the data set
all <- data

# generate SMD dummy
all$smd <- ifelse(all$country %in% c("Australia","Canada","New Zealand","UK","USA","France"),1,0)

# generate SMD factor variable
all$smdF <- ifelse(all$smd==1, "SMD", "PR")


# generate plurality party change variable
all$change <- rep(NA,nrow(all))

for (i in unique(all$Country)) {
	
		country <- all[all$Country==i,]

			for (j in 2:nrow(country)) {
			
				all[all$Country==i,"change"][j] <-  ifelse(country[j,"Party1Name"]==country[j-1,"Party1Name"],0,1)
				
			}

}

# generate new data that ranks countries by median LPR
lpr_sum <- ddply(all,~Country,summarize,median=median(lossprob_statictau,na.rm=T))
lpr_sum <- lpr_sum[order(lpr_sum[,2]),]
lpr_sum$orderlpr <- seq(1,nrow(lpr_sum))

# merge ranked median LPR data with main data set
all <- merge(all,lpr_sum,by="Country")

# generate new data that ranks countries by mean of plurality party change variable
change_sum <- ddply(all,~country,summarize,mean=mean(change,na.rm=T))
change_sum <- change_sum[order(change_sum[,2]),]
change_sum$orderchange <- seq(1,nrow(change_sum))

# merge ranked mean plurality party change data with main data set
all <- merge(all,change_sum,by="country")

# generate plurality party change probability based on historical elections
all$chprob <- rep(NA,nrow(all))

for (j in sort(unique(all$Country))) {

		for (i in 2:length(all[all$Country==j,"chprob"])) {

	all[all$Country==j,"chprob"][i] <- mean(all[all$Country==j,"change"][1:(i-1)],na.rm=T)
	
		}

}

# generate empty vector for SMD countries vote margin variable
all$vmargin <- rep(NA,nrow(all))

# calculate vote margins variable for SMD countries
all[all$smd==1,"vmargin"] <- all[all$smd==1,"VoteParty1"] - all[all$smd==1,"VoteParty2"]

# calculate vote margins variable for all countries
all$vmarginall <- all$VoteParty1 - all$VoteParty2

# calculate vote swing variable
all$vswing <- all$DParty1 - all$DParty2

############################ Golder Merge ######################################

# NOTE: This data come from:
# Bormann, Nils-Christian and Matt Golder. 2013. "Democratic Electoral Systems around the World, 1946-2011." Electoral Studies 32(2):360-69.
# https://files.nyu.edu/mrg217/public/elections.html

# set working directory
setwd("~/_third_party_data/Golder/")

# load the Golder data
golder <- read.dta("avemag.dta")

# change country variable in Golder data to factor
golder$country <- as.factor(golder$country)

# modify country names for merging
levels(golder$country)[22] <- "UK"
levels(golder$country)[23] <- "USA"

# merge Golder and LPR data
all <- merge(all,golder,by=c("country","elecyr"),all.x=TRUE)

# change variable name
colnames(all)[43] <- "year"

############################ Prices Merge ######################################

# NOTE: These are data we updated from the following article. See "price-notes-July_Mat.doc" for documentation.
# Chang, Eric C.C., Mark Andreas Kayser and Ronald Rogowski. 2008. "Electoral Systems and Real Prices: Panel Evidence for the OECD Countries, 1970-2000." British Journal of Political Science 38(4):739-51.

# set working directory
setwd("~/_third_party_data/Prices/")

# load in prices data
prices <- read.dta("pricesdata_july2013-Mat.dta")

# change country variable in prices data to factor
prices$country <- as.factor(prices$country)

# modify country names for merging
levels(prices$country)[22] <- "UK"
levels(prices$country)[23] <- "USA"

# merge prices and LPR data
lprprices <- merge(prices,all,by=c("country","year"),all.x=TRUE)

# fill in missing between-election LPR data by using the most recent LPR value
for ( i in lprprices$cntyid ) {

	for ( j in 2:nrow(lprprices[lprprices$cntyid==i,]) ) {
	
		lprprices[lprprices$cntyid==i,"lossprob_statictau"][j] <- ifelse(
		is.na(lprprices[lprprices$cntyid==i,"lossprob_statictau"][j])==TRUE,
		lprprices[lprprices$cntyid==i,"lossprob_statictau"][j-1],
		lprprices[lprprices$cntyid==i,"lossprob_statictau"][j])

	}

}

# fill in missing between-election vmarginall data by using the most recent vmarginall value
for ( i in lprprices$cntyid ) {

	for ( j in 2:nrow(lprprices[lprprices$cntyid==i,]) ) {
	
		lprprices[lprprices$cntyid==i,"vmarginall"][j] <- ifelse(
		is.na(lprprices[lprprices$cntyid==i,"vmarginall"][j])==TRUE,
		lprprices[lprprices$cntyid==i,"vmarginall"][j-1],
		lprprices[lprprices$cntyid==i,"vmarginall"][j])

	}

}

# fill in missing between-election seatsharegap data by using the most recent seatsharegap value
for ( i in lprprices$cntyid ) {

	for ( j in 2:nrow(lprprices[lprprices$cntyid==i,]) ) {
	
		lprprices[lprprices$cntyid==i,"seatsharegap"][j] <- ifelse(
		is.na(lprprices[lprprices$cntyid==i,"seatsharegap"][j])==TRUE,
		lprprices[lprprices$cntyid==i,"seatsharegap"][j-1],
		lprprices[lprprices$cntyid==i,"seatsharegap"][j])

	}

}

# fill in missing values from separate data collection
lprprices$cgdp_all <- ifelse(is.na(lprprices$cgdp_new),lprprices$cgdp,lprprices$cgdp_new)

# fill in missing values from separate data collection
lprprices$growth_all <- ifelse(is.na(lprprices$growth_new),lprprices$growth,lprprices$growth_new)

# fill in missing values from separate data collection
lprprices$import_all <- ifelse(is.na(lprprices$import_new),lprprices$import,lprprices$import_new)

# generate decade dummy
lprprices$dec2000 <- ifelse(lprprices$year>=2000,1,0)

# identify irregular elections indicator
lprprices$irreg <- ifelse((lprprices$country==15 & (lprprices$year==2002 | lprprices$year==2010)) | (lprprices$country==3 & (lprprices$year==2007 | lprprices$year==2010)) | (lprprices$country==6 & lprprices$year==1991) | (lprprices$country==7 & (lprprices$year==1981 | lprprices$year==1993 | lprprices$year==1997)) | (lprprices$country==21),1,0)

# rename lpr variable
lprprices$lpr <- lprprices$lossprob_statictau

# create squared term for lpr variable
lprprices$lpr2 <- lprprices$lossprob_statictau^2

############################## Save Data #######################################

# set working directory
setwd("~/_data/")

# remove unnecessary objects before saving the image
keep("all","lprprices", sure=TRUE)

# save objects to R image
save.image(file = "lprdata_plus.RData")